---
title: "Grid"
author: "Francesco Bailo"
date: "25/04/2020"
output: html_document
---

```{r setup, include=FALSE}
library(dplyr)
library(knitr)
library(sf)
library(ggplot2)
library(viridis)
library(knitr)
```

```{r}

ita_buffer.sf <- 
  read_sf("../shapefile/Italy/Limiti01012020/Reg01012020/Reg01012020_WGS84.shp") %>%
  st_combine() %>%
  st_buffer(1000)

hex_ita_10km.sf <- 
  st_make_grid(ita_buffer.sf,
               cellsize = 10000, 
               square = FALSE) %>%
  st_sf(hex_id = 0)
hex_ita_10km.sf$hex_id <- 1:nrow(hex_ita_10km.sf)
```

```{r}
ggplot(hex_ita_10km.sf) + geom_sf()
```

## Intersect with census sections

```{r}
sez2011_centroids.df <- read.csv("../census_2011/census_lonlat_sez_2011.csv")
sez2011_centroids.sf <-
  st_as_sf(sez2011_centroids.df, coords = c("lon", "lat"), crs = 4326)

res_sez_2011 <- 
  st_intersects(
    sez2011_centroids.sf %>% 
      st_transform(32632), 
    hex_ita_10km.sf)
is.na(res_sez_2011) <- lengths(res_sez_2011) == 0

sez2011_centroids.sf$hex_id <- unlist(res_sez_2011)
```

## Add census data

### Population

```{r}
sez2011_population.df <- read.csv("../census_2011/census_population_sez_2011.csv")

sez2011_population.df$hex_id <- 
  sez2011_centroids.sf$hex_id[match(sez2011_population.df$sez2011,
                                    sez2011_centroids.sf$sez2011)]

sez2011_population.df$E6 <- 
  as.numeric(as.character(sez2011_population.df$E6))
sez2011_population.df$E6[is.na(sez2011_population.df$E6)] <- 0

hex_population.df <- 
  sez2011_population.df %>%
  dplyr::select(hex_id, CODREG:PROCOM, P1:E31) %>%
  dplyr::group_by(hex_id) %>%
  dplyr::mutate_at(vars(CODREG:PROCOM), function(x) names(which.max(table(x)))) %>% # This generate inconsistencies (e.g. regione vs CODREG)
  dplyr::mutate_at(vars(P1:E31), sum, na.rm = TRUE) %>%
  dplyr::distinct()

hex_ita_10km.sf <-
  merge(hex_ita_10km.sf, 
        hex_population.df, 
        by = "hex_id",
        all.x = T)
```

### Industry

```{r}
require(readr)
require(stringr)

sez2011_industry.df <- 
  read_csv("../census_2011/census_industry_sez_2011.csv.zip",
           col_types = list(TIPO_SOGGETTO = col_character(),
                            CODREG = col_double(),
                            PROCOM = col_double(),
                            NSEZ = col_double(),
                            ATECO3 = col_double(),
                            NUM_UNITA = col_double(),
                            ADDETTI = col_double(),
                            ALTRI_RETRIB = col_double(),
                            VOLONTARI = col_double(),
                            SEZ2011 = col_double()))

sez2011_industry.df$ALTRI_RETRIB[is.na(sez2011_industry.df$ALTRI_RETRIB)] <- 0
sez2011_industry.df$VOLONTARI[is.na(sez2011_industry.df$VOLONTARI)] <- 0

extractAtecoD2 <- function(x) {
  return(as.numeric(gsub("\\d$", "", str_extract(x, "\\d{3}"))))
}

extractAtecoD3 <- function(x) {
  return(as.numeric(str_extract(x, "\\d{3}")))
}

sez2011_industry.df <-
  sez2011_industry.df %>%
  dplyr::mutate(manufacturing = 
           extractAtecoD2(ATECO3) %in% 10:33,
         construction = 
           extractAtecoD2(ATECO3) %in% 41:43,
         wholesaling = 
           extractAtecoD3(ATECO3) %in% 451:469,
        retail = 
          extractAtecoD3(ATECO3) %in% 471:479,
        transport = 
          extractAtecoD3(ATECO3) %in% 491:512,
        warehousing = 
          extractAtecoD3(ATECO3) %in% 521:522,
        delivery = 
          extractAtecoD3(ATECO3) %in% 531:532,
        hospitality = 
          extractAtecoD3(ATECO3) %in% 551:563,
        services = 
          extractAtecoD3(ATECO3) %in% 581:829,
        education = 
          extractAtecoD3(ATECO3) %in% 851:856,
        healthcare = 
          extractAtecoD3(ATECO3) %in% 861:869,
        agedcare = 
          extractAtecoD3(ATECO3) %in% 873)

sez2011_industry.df$manufacturing.meat <- 
  extractAtecoD3(sez2011_industry.df$ATECO3) == 101

sez2011_industry.df$manufacturing.ex_meat <- 
  sez2011_industry.df$manufacturing & !sez2011_industry.df$manufacturing.meat

sez2011_industry.df$manufacturing <- NULL

res <- 
  apply(sez2011_industry.df %>% 
        select(construction:manufacturing.ex_meat), 
      1, 
      function(x) ifelse(length(names(x)[x]) > 0, names(x)[x], NA))

sez2011_industry.df$ATECO3_code <- res
  
sez2011_industry.df <- 
  sez2011_industry.df %>%
  dplyr::select(-manufacturing.meat, -manufacturing.ex_meat, -construction, 
                -wholesaling, -retail, -transport, -warehousing, -delivery, -hospitality,
                -services, -education, -healthcare, -agedcare)


sez2011_industry.df$ATECO3_code[is.na(sez2011_industry.df$ATECO3_code)] <- 'other'

sez2011_industry.df %>%
  group_by(ATECO3_code) %>%
  summarise(n = sum(ADDETTI) + sum(ALTRI_RETRIB) + sum(VOLONTARI)) %>%
  mutate(freq = n / sum(n))

sez2011_industry.df$hex_id <- 
  sez2011_centroids.sf$hex_id[match(sez2011_industry.df$SEZ2011,
                                    sez2011_centroids.sf$sez2011)]

hex_industry.df <- 
  sez2011_industry.df %>%
  dplyr::group_by(hex_id) %>%
  dplyr::summarize(n_companies = n(),
                   mean_company_units = mean(NUM_UNITA, na.rm=T),
                   sum_employees = 
                     sum(ADDETTI) + sum(ALTRI_RETRIB) + sum(VOLONTARI),
                   sum_agedcare = 
                     sum(ADDETTI[ATECO3_code == "agedcare"]) + 
                     sum(ALTRI_RETRIB[ATECO3_code == "agedcare"]) + 
                     sum(VOLONTARI[ATECO3_code == "agedcare"]),
                   sum_construction = 
                     sum(ADDETTI[ATECO3_code == "construction"]) + 
                     sum(ALTRI_RETRIB[ATECO3_code == "construction"]) + 
                     sum(VOLONTARI[ATECO3_code == "construction"]),
                   sum_delivery = 
                     sum(ADDETTI[ATECO3_code == "delivery"]) + 
                     sum(ALTRI_RETRIB[ATECO3_code == "delivery"]) + 
                     sum(VOLONTARI[ATECO3_code == "delivery"]),
                   sum_education = 
                     sum(ADDETTI[ATECO3_code == "education"]) + 
                     sum(ALTRI_RETRIB[ATECO3_code == "education"]) + 
                     sum(VOLONTARI[ATECO3_code == "education"]),
                   sum_healthcare = 
                     sum(ADDETTI[ATECO3_code == "healthcare"]) + 
                     sum(ALTRI_RETRIB[ATECO3_code == "healthcare"]) + 
                     sum(VOLONTARI[ATECO3_code == "healthcare"]),
                   sum_hospitality = 
                     sum(ADDETTI[ATECO3_code == "hospitality"]) + 
                     sum(ALTRI_RETRIB[ATECO3_code == "hospitality"]) + 
                     sum(VOLONTARI[ATECO3_code == "hospitality"]),
                    sum_manufacturing.ex_meat = 
                     sum(ADDETTI[ATECO3_code == "manufacturing.ex_meat"]) + 
                     sum(ALTRI_RETRIB[ATECO3_code == "manufacturing.ex_meat"]) + 
                     sum(VOLONTARI[ATECO3_code == "manufacturing.ex_meat"]),
                   sum_manufacturing.meat = 
                     sum(ADDETTI[ATECO3_code == "manufacturing.meat"]) + 
                     sum(ALTRI_RETRIB[ATECO3_code == "manufacturing.meat"]) + 
                     sum(VOLONTARI[ATECO3_code == "manufacturing.meat"]),
                   sum_other = 
                     sum(ADDETTI[ATECO3_code == "other"]) + 
                     sum(ALTRI_RETRIB[ATECO3_code == "other"]) + 
                     sum(VOLONTARI[ATECO3_code == "other"]),
                   sum_retail = 
                     sum(ADDETTI[ATECO3_code == "retail"]) + 
                     sum(ALTRI_RETRIB[ATECO3_code == "retail"]) + 
                     sum(VOLONTARI[ATECO3_code == "retail"]),
                   sum_services = 
                     sum(ADDETTI[ATECO3_code == "services"]) + 
                     sum(ALTRI_RETRIB[ATECO3_code == "services"]) + 
                     sum(VOLONTARI[ATECO3_code == "services"]),
                   sum_transport = 
                     sum(ADDETTI[ATECO3_code == "transport"]) + 
                     sum(ALTRI_RETRIB[ATECO3_code == "transport"]) + 
                     sum(VOLONTARI[ATECO3_code == "transport"]),
                   sum_warehousing = 
                     sum(ADDETTI[ATECO3_code == "warehousing"]) + 
                     sum(ALTRI_RETRIB[ATECO3_code == "warehousing"]) + 
                     sum(VOLONTARI[ATECO3_code == "warehousing"]),
                   sum_wholesaling = 
                     sum(ADDETTI[ATECO3_code == "wholesaling"]) + 
                     sum(ALTRI_RETRIB[ATECO3_code == "wholesaling"]) + 
                     sum(VOLONTARI[ATECO3_code == "wholesaling"]),
                   median_employees = median(ADDETTI, na.rm=T))


hex_ita_10km.sf <-
  merge(hex_ita_10km.sf, 
        hex_industry.df, 
        by = "hex_id",
        all.x = T)
```


```{r}
ggplot(hex_ita_10km.sf) +
  geom_sf(aes(fill = P1)) +
  scale_fill_viridis()

ggplot(hex_ita_10km.sf) +
  geom_sf(aes(fill = log(sum_manufacturing.meat / P1)+1), colour = NA) +
  scale_fill_viridis()
```

## Save

```{r}
save(hex_ita_10km.sf, file = "hex_ita_10km.sf.RData")
```

